##############################
#
# Replication file for the main analysis in:
#
# Paths to Power:
# A new dataset on the social profile of governments
#
# For publication in the British Journal of Political Science
#
# Jacob Nyrup, Carl Henrik Knutsen, Peter Egge Langsæther, and Ina Lyftingsmo Kristiansen
# 
##################

### Open packages

pacman::p_load(here,tidyverse,haven,readxl,openxlsx,stringi,readr,
               hrbrthemes,scales,treemapify,RColorBrewer,viridis,extrafont,countrycode,scales,
               reactable,reactablefmtr,htmlwidgets,webshot,ggmap,ggthemes,ggplot2,WDI,
               wesanderson,ggtext,gridExtra,grid,ggpubr)

'%!in%' <- function(x,y)!('%in%'(x,y)) # Define function for not in

### Load data

df_panel <- read_rds("Data/PathstoPower_individuallevel_v1.0.rds") %>% mutate(year = as.numeric(as.character(year)))

df_crosssectional <- read_excel("Data/PathstoPower_countryyear_v1.0.xlsx") %>% filter(!is.na(country_isocode))

df_background <- read_rds("Data/backgroundvariables.rds") %>% mutate(year = as.numeric(as.character(year)))

df_alex <- read_rds("Data/df_alexiadou.rds")

df_lead <- read_rds("Data/df_lead.rds")

df_tmd <- read_rds("Data/df_tmd.rds")

###
# Basic information ---
###

# Unique countries

n_countries <- n_distinct(df_panel$country_isocode)

# Unique ministers

n_ministers <- df_panel %>% filter(classification %!in% c("Representative to the United Nations","Governor (Central Bank)","Ambassador to the United States")) %>% distinct(country_isocode,name,na.rm=TRUE)

###
# Figure 1 - Cross-Validating PtP against other datasets
###

# Alexiadou

alex1 <- ggplot(df_alex, aes(mean_alex_highered,mean_ptp_highered)) + 
  geom_count(col="#273046", show.legend=F,fill = NA, size = 1) +
  labs(y="Share ministers with higher education (PtP)", 
       x="Share ministers with higher education (Alexiadou, 2019)") + theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position = "none",
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10)) +
  geom_abline(intercept = 0, slope = 1,size=1) +
  stat_cor(aes(label = ..r.label..),method = "pearson", label.x = 0, label.y = 0.95) +
  xlim(0,1) + 
  ylim(0,1)

# LEAD

lead1 <- ggplot(df_lead, aes(mean_lead_milcareer,mean_ptp_milcareer)) + 
  geom_count(col="#273046", show.legend=F,fill = NA) +
  labs(y="Share leaders with military career (PtP)", 
       x="Share leaders with military career (LEAD, 2015)") + theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position = "none",
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10)) +
  geom_abline(intercept = 0, slope = 1,size=1) +
  stat_cor(aes(label = ..r.label..),method = "pearson", label.x = 0, label.y = 0.95) +
  xlim(0,1) + 
  ylim(0,1)

lead2 <- ggplot(df_lead, aes(mean_lead_highered,mean_ptp_higered)) + 
  geom_count(col="#273046", show.legend=F,fill = NA) +
  labs(y="Share leaders with higher education (PtP)", 
       x="Share leaders with higher education (LEAD, 2015)") + theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position = "none",
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10)) +
  geom_abline(intercept = 0, slope = 1,size=1) +
  stat_cor(aes(label = ..r.label..),method = "pearson", label.x = 0, label.y = 0.95) +
  xlim(0,1) + 
  ylim(0,1)

# Technocratic Minister Dataset

tmd1 <- ggplot(df_tmd, aes(mean_tmd_politician,mean_ptp_politician)) + 
  geom_count(col="#273046", show.legend=F,fill = NA) +
  labs(y="Share elected (PtP)", 
       x="Share elected (TMD, 2021)") + theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position = "none",
        axis.title.x = element_text(size=10),
        axis.title.y = element_text(size=10)) +
  geom_abline(intercept = 0, slope = 1,size=1) +
  stat_cor(aes(label = ..r.label..),method = "pearson", label.x = 0, label.y = 0.95) +
  xlim(0,1) + 
  ylim(0,1)

ggsave(
  "Output/figure1.png",
  gridExtra::grid.arrange(alex1,tmd1,lead1,lead2),
  width = 10,
  height = 6,
  dpi = 1200
)

###
# Figure 2 - Country Averages on Core Variables 2000-2021.
###

map.world <- ggplot2::map_data('world') %>% dplyr::mutate(country_isocode = countrycode(region,"country.name","iso3c"))

# Education

df_map <- df_crosssectional %>% subset(year < 2022 & year > 1999) %>% 
  group_by(country_isocode) %>% 
  dplyr::summarize(avg_education = mean(degree_total,na.rm=TRUE),
                   avg_abroad = mean(abroad_total,na.rm=TRUE),
                   avg_polfamiliy = mean(polfamily_total,na.rm=TRUE),
                   avg_polback = mean(pol_total,na.rm=TRUE)) %>%
  mutate(country_isocode = as.vector(country_isocode))

df_map_merged <- left_join(map.world, df_map, by = c('country_isocode')) %>% filter(region != "Greenland")

map_shareeducation <- ggplot(data = df_map_merged, aes(x = long, y = lat, group = group)) +
  coord_cartesian(ylim = c(-50, 90)) +
  geom_polygon(aes(fill = `avg_education`)) + theme_bw() + theme(line = element_blank(),
                                                                 axis.text=element_blank(),
                                                                 axis.title=element_blank(),
                                                                 panel.background = element_blank(),
                                                                 panel.border = element_blank()) +
  scale_fill_viridis(option="mako", na.value="grey", direction=-1,labels=paste0(c(0,0.25,0.5,0.75,1)*100,"%"),begin = 0) + 
  labs(fill='Share attended\nuniversity')

# Study abroad

map_abroad <- ggplot(data = df_map_merged, aes(x = long, y = lat, group = group)) +
  coord_cartesian(ylim = c(-50, 90)) +
  geom_polygon(aes(fill = `avg_abroad`)) + theme_bw() + theme(line = element_blank(),
                                                              axis.text=element_blank(),
                                                              axis.title=element_blank(),
                                                              panel.background = element_blank(),
                                                              panel.border = element_blank()) +
  scale_fill_viridis(option="mako", na.value="grey", direction=-1,labels=paste0(c(0,0.25,0.5,0.75,1)*100,"%"),begin = 0) + 
  labs(fill='Share studied\nabroad')

# Political family

map_polfamiliy <- ggplot(data = df_map_merged, aes(x = long, y = lat, group = group)) +
  coord_cartesian(ylim = c(-50, 90)) +
  geom_polygon(aes(fill = `avg_polfamiliy`)) + theme_bw() + theme(line = element_blank(),
                                                                  axis.text=element_blank(),
                                                                  axis.title=element_blank(),
                                                                  panel.background = element_blank(),
                                                                  panel.border = element_blank()) +
  scale_fill_viridis(option="mako", na.value="grey", direction=-1,breaks = c(0,0.25,0.5,0.75,1),labels=paste0(c(0,0.25,0.5,0.75,1)*100,"%"),begin = 0, end = 1) + 
  labs(fill='Share from a\npolitical family')

# Politician

map_polback <- ggplot(data = df_map_merged, aes(x = long, y = lat, group = group)) +
  coord_cartesian(ylim = c(-50, 90)) +
  geom_polygon(aes(fill = `avg_polback`)) + theme_bw() + theme(line = element_blank(),
                                                               axis.text=element_blank(),
                                                               axis.title=element_blank(),
                                                               panel.background = element_blank(),
                                                               panel.border = element_blank()) +
  scale_fill_viridis(option="mako", na.value="grey", direction=-1,breaks = c(0,0.25,0.5,0.75,1),labels=paste0(c(0,0.25,0.5,0.75,1)*100,"%"),begin = 0, end = 1) + 
  labs(fill='Share political\nbackground')

# Print map

ggsave("Output/figure2.png",
       gridExtra::grid.arrange(map_shareeducation, map_polback, map_abroad, map_polfamiliy),
       width = 10,
       height = 6,
       dpi = 1200
)

####
# Figure 3 - Do democracies select more educated cabinet members?
####

df_panel_figure3 <- df_panel %>% mutate(educationhigh = case_when(degree %in% c(1,2,3,4,5,6,7,10) ~ 0,
                                 degree %in% c(8,9) ~ 1))

df_leadereduc <- df_panel_figure3 %>% filter(leader == 1) %>% select(year,country_isocode,leader_education = educationhigh)

df_ministereduc <- df_panel_figure3 %>% 
                                    filter(core == 1 & leader == 0) %>% 
                                    group_by(year,country_isocode) %>%
                                    summarize(minister_education = mean(educationhigh,na.rm=TRUE))

df_figure3 <- df_crosssectional %>% select(year,country_isocode) %>% mutate(year = as.numeric(as.character(year)))

# Merge

df_figure3 <- left_join(df_figure3,df_background,by=c("country_isocode","year")) %>%
                     left_join(.,df_leadereduc,by=c("country_isocode","year")) %>%
                     left_join(.,df_ministereduc,by=c("country_isocode","year"))

# Yearly data

df_figure3_yearly <- df_figure3 %>% group_by(year,democracy_polity) %>% 
                                   summarize(mean_leader = mean(leader_education,na.rm=TRUE),
                                   mean_cabinet = mean(minister_education,na.rm=TRUE)) %>%
                                   filter(!is.na(democracy_polity) & year > 1965) %>%
                                   mutate(democracy_polity = as.factor(democracy_polity)) %>%
                                   pivot_longer(cols = c(mean_leader,mean_cabinet),
                                   names_to = "group",
                                   values_to = "share_educated") %>%
                                   mutate(year = as.numeric(year))

# Make graph

ploteducation <- ggplot(data=df_figure3_yearly, aes(x=year, y=share_educated,
                                            linetype = group,
                                            color = democracy_polity)) +
                        geom_line(size=1.1) +
                        theme_bw() + 
                        theme(panel.border = element_blank(), panel.grid.major = element_blank(),
                        panel.grid.minor = element_blank(),
                        axis.line = element_line(colour = "black"),
                        legend.position = "right",
                        legend.title = element_blank(),
                        plot.title = element_text(size=9),
                        axis.title.x = element_text(size=10),
                        axis.title.y = element_text(size=10)) +
                        scale_x_continuous(breaks=seq(1970,2021,10)) + 
                        scale_y_continuous(labels=scales::percent_format(accuracy = 1), limits = c(0,0.75)) +
                        ylab("") +
                        xlab("") +
                        labs(title = "% with a graduate degree") +
                        scale_color_manual(values=c("#CB2314","#273046"),
                        name = "Regime",
                        labels = c("Autocracy", "Democracy")) +
                        scale_linetype_manual(values = c("solid", "dotdash"),
                        labels = c("Cabinet", "Leader"))


ggsave("Output/figure3.png",
       width = 6,
       height = 4,
       dpi = 1200,
       bg="white")

###
# Figure 4 - Where do cabinet members have working-class occupations before entering politics?
###

# Found in " figure4"-do file

###
# Figure 5 - Figure 5: Are male or female cabinet members most likely to come from a family of politicians
###

df_figure5 <- df_panel %>% mutate(politicalfamily = recode(politicalfamily,"Yes" = 1, "No" = 0),
                                  decade = case_when(as.numeric(year) < 1980 ~ "1966-1979",
                                                   as.numeric(year) < 1990 ~ "1980-1989",
                                                   as.numeric(year) < 2000 ~ "1990-1999",
                                                   as.numeric(year) < 2010 ~ "2000-2009",
                                                   as.numeric(year) < 2022 ~ "2010-2021",
                                                   TRUE ~ NA_character_)) %>%
  filter(year > 1965 & gender %in% c("Male","Female") & core == 1) %>%
  group_by(decade,gender) %>% 
  summarize(mean_fam = mean(politicalfamily,na.rm=TRUE),
            n = n())

n_graph <- df_panel %>% filter(!is.na(politicalfamily)) %>% distinct(name)

figure5 <-  ggplot(df_figure5, aes(x = decade, y = mean_fam, group = gender, colour = gender)) + 
  geom_point(stat='summary', fun=sum, size = 3) +
  geom_line(stat="summary", fun=sum, size = 0.75) +
  theme_bw() + ylab("") + xlab("") +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position = "none",plot.title = element_text(size=13),
        axis.text.x = element_text(size=13),
        axis.text.y = element_text(size=13),
        legend.title=element_text(size=13),
        legend.text=element_text(size=13)) +
  scale_y_continuous(labels=scales::percent_format(accuracy = 1),limits = c(0,0.4))  +
  labs(fill='Share ministers',
  ) +
  scale_color_manual(values=wes_palette(n=3, name="BottleRocket1"), name="") +
  annotate("text", x = 1, y = 0.32, label = "Female",size = 4) +
  annotate("text", x = 1, y = 0.13, label = "Male",size = 4)

figure5

ggsave("Output/figure5.png",width = 200, height = 125, units = "mm",
       dpi = 1200,
       bg="white")

###
# Figure 6: What did different types of cabinet members do before entering politics?
###

df_financeminister <- df_panel %>% filter(m_finance == 1) %>% 
  distinct(name, .keep_all = TRUE) %>%
  group_by(occupation_text) %>% 
  summarize(n = n()) %>%
  filter(!is.na(occupation_text)) %>%
  mutate(minister = "Minister of Finance",
         share = n/sum(n)) %>%
  top_n(5,share) %>%
  mutate(occupation_text = recode(occupation_text,
                                  "1. Academic (research and tertiary education)" = "Academic",
                                  "15. CEO/CFO/VP/Head of Large Branch/Senior Management/Owner large private company (more than 50 employees)" = "CEO/owner large private company",
                                  "21. Other white-collar job (private sector)" = "White-collar job (private sector)",
                                  "22. Other white-collar job (public sector)" = "White-collar job (public sector)",
                                  "6. Judge or lawyer" = "Judge or lawyer",
                                  "10. Work for a political party or an organization with strong ties to a party" = "Work for a political organization",
                                  "12. Military officer or soldier (army, navy and airforce)" = "Military",
                                  "7. Ambassador or working in the foreign service" = "Job in the foreign service"
  ))

df_defense <- df_panel %>% filter(m_defense == 1) %>% 
  distinct(name, .keep_all = TRUE) %>%
  group_by(occupation_text) %>% 
  summarize(n = n()) %>%
  filter(!is.na(occupation_text)) %>%
  mutate(minister = "Minister of Defense",
         share = n/sum(n)) %>%
  top_n(5,share) %>%
  mutate(occupation_text = recode(occupation_text,
                                  "1. Academic (research and tertiary education)" = "Academic",
                                  "15. CEO/CFO/VP/Head of Large Branch/Senior Management/Owner large private company (more than 50 employees)" = "CEO/owner large private company",
                                  "21. Other white-collar job (private sector)" = "White-collar job (private sector)",
                                  "22. Other white-collar job (public sector)" = "White-collar job (public sector)",
                                  "6. Judge or lawyer" = "Judge or lawyer",
                                  "10. Work for a political party or an organization with strong ties to a party" = "Work for a political organization",
                                  "12. Military officer or soldier (army, navy and airforce)" = "Military",
                                  "7. Ambassador or working in the foreign service" = "Job in the foreign service"
  ))

df_foreignaffairs <- df_panel %>% filter(m_foreignaffairs == 1) %>% 
  distinct(name, .keep_all = TRUE) %>%
  group_by(occupation_text) %>% 
  summarize(n = n()) %>%
  filter(!is.na(occupation_text)) %>%
  mutate(minister = "Minister of Foreign Affairs",
         share = n/sum(n)) %>%
  top_n(5,share) %>%
  mutate(occupation_text = recode(occupation_text,
                                  "1. Academic (research and tertiary education)" = "Academic",
                                  "15. CEO/owner large private company (more than 50 employees)" = "CEO/owner large private company",
                                  "21. Other white-collar job (private sector)" = "White-collar job (private sector)",
                                  "22. Other white-collar job (public sector)" = "White-collar job (public sector)",
                                  "6. Judge or lawyer" = "Judge or lawyer",
                                  "10. Work for a political party or an organization with strong ties to a party" = "Work for a political organization",
                                  "12. Military officer or soldier (army, navy and airforce)" = "Military",
                                  "7. Ambassador or working in the foreign service" = "Job in the foreign service"
  ))

df_leader <- df_panel %>% filter(leader == 1) %>% 
  distinct(name, .keep_all = TRUE) %>%
  group_by(occupation_text) %>% 
  summarize(n = n()) %>%
  filter(!is.na(occupation_text)) %>%
  mutate(minister = "Leader",
         share = n/sum(n)) %>%
  top_n(5,share) %>%
  mutate(occupation_text = recode(occupation_text,
                                  "1. Academic (research and tertiary education)" = "Academic",
                                  "15. CEO/CFO/VP/Head of Large Branch/Senior Management/Owner large private company (more than 50 employees)" = "CEO/owner large private company",
                                  "21. Other white-collar job (private sector)" = "White-collar job (private sector)",
                                  "22. Other white-collar job (public sector)" = "White-collar job (public sector)",
                                  "6. Judge or lawyer" = "Judge or lawyer",
                                  "10. Work for a political party or an organization with strong ties to a party" = "Work for a political organization",
                                  "12. Military officer or soldier (army, navy and airforce)" = "Military",
                                  "7. Ambassador or working in the foreign service" = "Job in the foreign service"
  ))


### Plot

theme_occupation <- function(df = dataset, typeminister ="you are a title"){
  ggplot(df, aes(x=reorder(occupation_text,share), y=share)) + 
    geom_bar(stat = "identity", fill = "darkblue") +
    coord_flip() +
    labs(x = NULL,
         y = NULL,
         title = typeminister) +
    theme_ipsum() +
    theme(legend.position = "none",
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
          panel.background = element_blank(), axis.line = element_line(colour = "black"),
          axis.text.y = element_text(size = 10),
          plot.title = element_text(size= 10, face = "plain"),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank(),
          plot.margin = unit(c(0.9,0.1,0,1), 'lines')) +
    scale_y_continuous(labels=scales::percent_format(accuracy = 1), limits = c(0,0.5)) + 
    geom_text(aes(label = paste0(round(share*100,0),"%")),
              nudge_y = 0.075,
              na.rm = TRUE,
              size = 4,
    )
}

plot_finance <- theme_occupation(df = df_financeminister, typeminister = "Minister of Finance")

plot_defense <- theme_occupation(df = df_defense, typeminister = "Minister of Defense")

plot_foreignaffairs <- theme_occupation(df = df_foreignaffairs, typeminister = "Minister of Foreign Affairs")           

plot_leader <- theme_occupation(df = df_leader, typeminister = "Leader")        

combinedplot <- gridExtra::grid.arrange(plot_leader, plot_defense, plot_foreignaffairs, plot_finance)

ggsave("Output/figure6.png",width = 200, height = 125, units = "mm",
       combinedplot,
       dpi = 1200,
       bg="white")

###
# Figure 7: Where have the world’s cabinet members studied?
###

df_ir <- df_panel %>% filter(!is.na(degree) & core == 1)

nworld <- df_ir %>% group_by(year) %>% tally(n = "n_world")

ncountry <- df_ir %>% mutate(country_isocode = recode(country_isocode,"SUN" = "RUS")) %>% group_by(year,country_isocode) %>% tally(n = "n_country")

studyincountry_1 <- df_ir %>% mutate(abroad_isocode1 = recode(abroad_isocode1,"SUN" = "RUS")) %>%
  group_by(year,abroad_isocode1) %>% tally(n = "country1")

studyincountry_2 <- df_ir %>% mutate(abroad_isocode2 = recode(abroad_isocode2,"SUN" = "RUS")) %>%
  group_by(year,abroad_isocode2) %>% tally(n = "country2")

studyincountry_3 <- df_ir %>% mutate(abroad_isocode3 = recode(abroad_isocode3,"SUN" = "RUS")) %>%
  group_by(year,abroad_isocode3) %>% tally(n = "country3") 

studyincountry_4 <- df_ir %>% mutate(abroad_isocode4 = recode(abroad_isocode4,"SUN" = "RUS")) %>%
  group_by(year,abroad_isocode4) %>% tally(n = "country4") 

studyincountry_5 <- df_ir %>% mutate(abroad_isocode5 = recode(abroad_isocode5,"SUN" = "RUS")) %>%
  group_by(year,abroad_isocode5) %>% tally(n = "country5")

graphir <- left_join(studyincountry_1,studyincountry_2, by = c("year","abroad_isocode1" = "abroad_isocode2")) %>% 
  left_join(.,studyincountry_3, by = c("year","abroad_isocode1" = "abroad_isocode3")) %>%
  left_join(.,studyincountry_4, by = c("year","abroad_isocode1" = "abroad_isocode4")) %>% 
  left_join(.,studyincountry_5, by = c("year","abroad_isocode1" = "abroad_isocode5")) %>% 
  left_join(.,nworld,by=c("year")) %>% 
  left_join(.,ncountry,by=c("year","abroad_isocode1"="country_isocode")) %>%  rowwise() %>%
  mutate(sharestudythere = sum(c_across(c(country1,country2,country3,country4,country5)),na.rm=TRUE)/(n_world-n_country))

importantcountries <- graphir %>% 
                              filter(abroad_isocode1 %in% c("GBR","USA","FRA","RUS","CHN") & year > 1965 & !is.na(abroad_isocode1)) %>% 
                              mutate(year = as.numeric(year))

studycountry <- ggplot(importantcountries, aes(x = year, y = sharestudythere, colour = abroad_isocode1)) + 
  geom_line(size=1) + theme_bw() + ylab("") + xlab("") +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position = "none",plot.title = element_text(size=13),
        axis.title.x = element_text(size=18),
        axis.title.y = element_text(size=12),
        axis.text.x = element_text(size=13),
        axis.text.y = element_text(size=13),) +
  scale_y_continuous(labels=scales::percent_format(accuracy = 1),limits = c(0,0.15))  +
  scale_x_continuous(limits = c(1966,2026))  +
  labs(fill='Share ministers',
  ) +
  scale_color_manual(values=wes_palette(n=6, name="BottleRocket1")) +
  annotate("text", x = 2021, y = 0.13, label = "United States",size = 4) +
  annotate("text", x = 2024, y = 0.10, label = "Great\nBritain",size = 4) +
  annotate("text", x = 2023, y = 0.04, label = "France",size = 4) +
  annotate("text", x = 2023, y = 0.025, label = "Russia",size = 4) +
  annotate("text", x = 2022.5, y = 0.005, label = "China",size = 4)

ggsave("Output/figure7.png",width = 270, height = 150, units = "mm",
       dpi = 1200,
       bg="white")

###
# Figure 8: here are democratic and republican cabinet members born?
###

register_google(key = "KEY") ## Insert Google-key for replication

# Get politicians

usa_politicians <- df_panel %>% 
  filter(country_isocode == "USA" & year > 1992 & !is.na(party)) %>%
  distinct(name, .keep_all = TRUE) %>%
  mutate(party = fct_relevel(party,c("dem","rep","independent")))

# Get map

usamap <-get_map(location = "united states",
              zoom=3,
              maptype = "hybrid",
              source='google',
              color='color')

map_ministers <- ggmap(usamap) + geom_jitter(
  aes(x=pob_longitude, y=pob_latitude, color = party), 
  data=usa_politicians, alpha=.8, na.rm = T,width = 0.7, height = 0.7, size = 4) +
  scale_color_manual(labels = c("Democrat","Republican","Independent"),
                     values = c("#00008B","#FF7F7F","#5A5A5A"),
                     name = "") +
  scale_x_continuous(limits = c( -125, -60), expand = c(0,0)) +
  scale_y_continuous(limits = c(20,50),expand = c(0,0)) +
  labs(x = NULL,
       y = NULL,
  ) +
  theme_minimal() +
  theme(
    plot.title = element_markdown(size = 11),
    legend.text = element_text(size = 14),
    axis.text.x=element_blank(),
    axis.ticks.x=element_blank(),
    axis.text.y=element_blank(),
    axis.ticks.y=element_blank()
  ) +
  guides(color = guide_legend(override.aes = list(size = 4)))

map_ministers

ggsave("Output/figure8.png",width = 270, height = 150, units = "mm",
       dpi = 1200,
       bg="white")

###
# Figure 9: Where did British cabinet ministers attend university?
###

df_fig9country <- df_panel %>% 
                 filter(country_isocode == "GBR" & core == 1 & !is.na(degree)) %>% 
                 distinct(name, .keep_all = TRUE) %>% 
                 mutate(university = replace_na(university,"Did not attend university"),
                        university = case_when(degree %in% c(4,5,6) ~ "Did not attend university",
                                               TRUE ~ as.character(university)))

universities <- df_fig9country %>% 
  mutate(university = case_when(university %!in% c("University of Oxford","University of Cambridge",
                                                   "Did not attend university","London School of Economics and Political Science - University of London",
                                                   "The University of Edinburgh","Durham University") ~ "Other universities",
                                TRUE ~ as.character(university))) %>%
                              group_by(university) %>% 
                              tally() %>% 
                group_by(university) %>% summarize(n = sum(n)) %>%
  mutate(sort = recode(university,
                       "Did not attend university" = 3,
                       "University of Oxford" = 2,
                       "University of Cambridge" = 2,
                       "London School of Economics and Political Science - University of London" = 2,
                       "The University of Edinburgh" = 2,
                       "Durham University" = 2,
                       "Other universities" = 3
                       ),
         university = recode(university, "London School of Economics and Political Science - University of London" = "LSE",
                             "The University of Edinburgh" = "Uni. of Edinburgh")
         )

ggplot(universities, aes(area = n, fill = n,subgroup = sort)) +
  geom_treemap() +
  geom_treemap_text(aes(label = paste0(university, " (", n, ")")),
                    colour = "white",
                    place = "centre",
                    family = "mono",
                    fontface = "bold",
                    min.size = 8) +
  scale_fill_viridis(end = 0.3,
                     direction=-1,
                     option = "E") +
  theme_bw() +
  theme(legend.position = "none",
        plot.title = element_text(family = "mono", face = "bold", size = (20)))


ggsave("Output/figure9.png",
       dpi = 1200,
       width=14,
       height=8)
